In [3]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from scipy.misc import logsumexp
from scipy.special import erf
from scipy.stats import maxwell

import astropy.coordinates as coord
import gary.coordinates as gc
import emcee
import triangle

In [31]:
halo_dispersion = 106. # km/s
rrl_vr_err = 15. # km/s
mgiant_vr_err = 2. # km/s

def ln_normal(x, mu, var):
    return -0.5*np.log(2*np.pi) - 0.5*np.log(var) - 0.5 * (x-mu)**2 / var

First I'll fit a velocity model to the M-giants to derive the gradient and dispersion of the TriAnd population.


In [32]:
mgiants = np.genfromtxt("/Users/adrian/projects/triand-rrlyrae/data/triand_giants.txt", names=True)
mgiants.dtype.names


Out[32]:
('l', 'b', 'vr', 'ra', 'dec')

In [33]:
plt.plot(mgiants['l'], mgiants['vr'], linestyle='none')
plt.ylim(-300,300)


Out[33]:
(-300, 300)

In [34]:
# mixture model - f_ol is outlier fraction
def ln_prior(p):
    m,b,V,halo_v,f_ol = p
    
    if m > 0. or m < -50:
        return -np.inf
    
    if b < 0 or b > 500:
        return -np.inf
    
    if V <= 0.:
        return -np.inf
    
    if f_ol > 1. or f_ol < 0.:
        return -np.inf
    
    if halo_v > 100 or halo_v < -100:
        return -np.inf
    
    return -np.log(V)

def Mg_likelihood(p, l, vr, sigma_vr):
    m,b,sigma,halo_v,f_ol = p
    V = sigma_vr**2 + sigma**2
    term1 = ln_normal(vr, m*l + b, V)
    term2 = ln_normal(vr, halo_v, halo_dispersion**2)
    return np.array([term1, term2])

def Mg_ln_likelihood(p, *args):
    m,b,V,halo_v,f_ol = p
    x = Mg_likelihood(p, *args)
    
    # coefficients
    b = np.zeros_like(x)
    b[0] = 1-f_ol
    b[1] = f_ol
    
    return logsumexp(x,b=b, axis=0)
    
def Mg_ln_posterior(p, *args):
    lnp = ln_prior(p)
    if np.isinf(lnp):
        return -np.inf
    
    return lnp + Mg_ln_likelihood(p, *args).sum()

def outlier_prob(p, *args):
    m,b,V,halo_v,f_ol = p
    p1,p2 = Mg_likelihood(p, *args)
    return f_ol*np.exp(p2) / ((1-f_ol)*np.exp(p1) + f_ol*np.exp(p2))

In [35]:
nwalkers = 32
Mg_sampler = emcee.EnsembleSampler(nwalkers=nwalkers, dim=5, lnpostfn=Mg_ln_posterior, 
                                   args=(mgiants['l'],mgiants['vr'],mgiant_vr_err))

In [36]:
# initialize walkers
p0 = np.zeros((nwalkers,Mg_sampler.dim))
p0[:,0] = np.random.normal(-1, 0.1, size=nwalkers)
p0[:,1] = np.random.normal(150, 0.1, size=nwalkers)
p0[:,2] = np.random.normal(25, 0.5, size=nwalkers)
p0[:,3] = np.random.normal(0., 10., size=nwalkers)
p0[:,4] = np.random.normal(0.1, 0.01, size=nwalkers)

for pp in p0:
    lnp = Mg_ln_posterior(pp, *Mg_sampler.args)
    if not np.isfinite(lnp):
        print("you suck")

In [37]:
pos,prob,state = Mg_sampler.run_mcmc(p0, N=100)
Mg_sampler.reset()
pos,prob,state = Mg_sampler.run_mcmc(pos, N=1000)

In [38]:
fig = triangle.corner(Mg_sampler.flatchain,
                      labels=[r'$\mathrm{d}v/\mathrm{d}l$', r'$v_0$', r'$\sigma_v$', r'$v_{\rm halo}$', r'$f_{\rm halo}$'])



In [39]:
map_dvdl,map_v0,map_sigma_triand,map_vhalo,map_fhalo = Mg_sampler.flatchain[Mg_sampler.flatlnprobability.argmax()]

From this, we can run a mixture model and ask what fraction of the RR Lyrae we have could be a part of the same M-giant population, defined by velocity


In [19]:
# rrl = np.genfromtxt("/Users/adrian/projects/triand-rrlyrae/data/RRL_ALL.txt", names=True, dtype=None)
rrl = np.genfromtxt("/Users/adrian/projects/triand-rrlyrae/data/TriAnd_RRL_19mar15.csv", 
                    skiprows=0, dtype=None, names=True, delimiter=',')
print rrl.dtype.names


('oname', 'name', 'ra', 'dec', 'amp', 'Vsys', 'Err')

In [25]:
c = coord.SkyCoord(ra=rrl['ra']*u.deg, dec=rrl['dec']*u.deg)
rrl_l = c.galactic.l.degree
rrl_vgsr = gc.vhel_to_vgsr(c, rrl['Vsys']*u.km/u.s,
                           vcirc=236.*u.km/u.s,
                           vlsr=[11.1,12.24,7.25]*u.km/u.s).value # same as Sheffield et al. 2014
verr =rrl['Err']

The likelihood is something like this:

We'll fix the mean and variance of the halo: $$ \mu_{\rm h},\sigma_{\rm h} = (0.,106)~{\rm km~s}^{-1}\\ $$

$f_{\rm h}$ is the fraction of halo stars in the sample of RR Lyrae.

We'll infer the halo fraction using the likelihood: $$ p(D,v \,|\, f_{\rm h}) = p(D \,|\, v) \, p(v \,|\, f_{\rm h})\\ p(v \,|\, f_{\rm h}) = f_{\rm h}\,p(v \,|\, \mu_{\rm h},\sigma_{\rm h}) + (1-f_{\rm h})\,p(v \,|\, v_{0,\rm TriAnd},{\rm d}v/{\rm d}l,\sigma_{\rm TriAnd}) $$

With observed velocities, we instead use the marginal likelihood: $$ p(D \,|\, f_{\rm h}) = \int p(D,v \,|\, f_{\rm h}) dv $$


In [21]:
mu_h,sigma_h = (0., 106.)  # km/s - Cite W. Brown (2009)?

In [42]:
def rrl_ln_likelihood(p, v_obs, v_err, v0, dv_dl, sigma_triand, vhalo, l):
    f_h = p[0]
    
    V1 = v_err**2 + sigma_h**2
    V2 = v_err**2 + sigma_triand**2
    
    term1 = ln_normal(v_obs, vhalo, V1)
    term2 = ln_normal(v_obs, dv_dl*l + v0, V2)
    x = np.array([term1, term2])
    
    # coefficients
    b = np.zeros_like(x)
    b[0] = f_h
    b[1] = 1-f_h
    
    return logsumexp(x, b=b, axis=0)

def rrl_ln_prior(p):
    f_h = p[0]
    
    if f_h > 1. or f_h < 0.:
        return -np.inf
    
    return 0.

def rrl_ln_posterior(p, *args):
    lnp = ln_prior(p)
    if np.isinf(lnp):
        return -np.inf
    
    return lnp + ln_likelihood(p, *args).sum()

Just compute likelihood for many posterior samples from M-giants


In [43]:
fig,axes = plt.subplots(1,2,figsize=(12,6))

fs = np.linspace(0.,1.,100)
for i in np.random.randint(len(Mg_sampler.flatchain), size=100):
    dvdl,v0,sigmav,vhalo,fh = Mg_sampler.flatchain[i]
    
    ls = np.zeros_like(fs)
    for i,f in enumerate(fs):
        ls[i] = rrl_ln_likelihood([f], rrl_vgsr, verr, v0, dvdl, sigmav, vhalo, rrl_l).sum()
    
    axes[0].plot(1-fs, ls, marker=None, alpha=0.25, c='k')
    axes[1].plot(1-fs, np.exp(ls-ls.max()), marker=None, alpha=0.25, c='k')



In [44]:
nrrlyrae = 77.
nmgiants = 74.

In [46]:
fig,ax = plt.subplots(1,1,figsize=(6,6))

fs = np.linspace(0.,1.,100)
for i in np.random.randint(len(Mg_sampler.flatchain), size=100):
    dvdl,v0,sigmav,vhalo,fh = Mg_sampler.flatchain[i]
    
    ls = np.zeros_like(fs)
    for i,f in enumerate(fs):
        ls[i] = rrl_ln_likelihood([f], rrl_vgsr, verr, v0, dvdl, sigmav, vhalo, rrl_l).sum()
    
    ax.plot((1-fs) * nrrlyrae/0.8 / nmgiants, np.exp(ls-ls.max()), marker=None, alpha=0.25, c='k')
    
ax.set_xlabel(r"$f_{\rm RR:MG}$", fontsize=24)
ax.set_xlim(0,1)


Out[46]:
(0, 1)

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:

Sample it


In [101]:
nwalkers = 32
sampler = emcee.EnsembleSampler(nwalkers=nwalkers, dim=1, lnpostfn=ln_posterior, 
                                args=(rrl['vgsr'], vr_err, map_v0, map_dvdl, map_sigma_triand, rrl['l']))

p0 = np.zeros((nwalkers,sampler.dim))
p0[:,0] = np.random.normal(0.5, 0.01, size=nwalkers)

for pp in p0:
    lnp = ln_posterior(pp, *sampler.args)
    if not np.isfinite(lnp):
        print("you suck")

In [102]:
pos,prob,state = sampler.run_mcmc(p0, N=100)
sampler.reset()
pos,prob,state = sampler.run_mcmc(pos, N=1000)

In [103]:
fs = np.linspace(0.,1.,100)
ls = np.zeros_like(fs)
for i,f in enumerate(fs):
    ls[i] = ln_likelihood([f], rrl['vgsr'], vr_err, map_v0, map_dvdl, map_sigma_triand, rrl['l']).sum()
    
fig,axes = plt.subplots(1,2,figsize=(12,6))
axes[0].plot(fs,ls)
axes[1].plot(fs,np.exp(ls-ls.max()))
axes[1].axvline(true_f_h)
# axes[1].set_xlim(0.8,1)


Out[103]:
<matplotlib.lines.Line2D at 0x10d882410>

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:

Here I'll generate some fake data and observe it ...


In [17]:
np.random.seed(42)

N = 1000
true_f_h = 1.
N_h = int(true_f_h*N)
N_T = N-int(true_f_h*N)
true_v = np.append(np.random.normal(mu_h, sigma_h, size=N_h),
                   np.random.normal(mu_T, sigma_T, size=N_T))

# generate with the wrong distribution
# true_v = np.append(np.random.normal(mu_h, sigma_h-25, size=N_h),
#                    np.random.normal(mu_T, sigma_T, size=N_T))

print("N halo: {}, N TriAnd: {}".format(N_h, N_T))

v_err_18 = 15.
v_obs_18 = np.random.normal(true_v, v_err_18)
v_obs_18 = v_obs_18[:18]

v_err_179 = 15.
v_obs_179 = np.random.normal(true_v, v_err_179)
v_obs_179 = v_obs_179[:233]


N halo: 1000, N TriAnd: 0

In [18]:
nwalkers = 32
mock_sampler_1 = emcee.EnsembleSampler(nwalkers=nwalkers, dim=1, lnpostfn=ln_posterior, 
                                        args=(v_obs_18, v_err_18))

p0 = np.zeros((nwalkers,mock_sampler_1.dim))
p0[:,0] = np.random.normal(0.5, 0.01, size=nwalkers)

for pp in p0:
    lnp = ln_posterior(pp, *mock_sampler_1.args)
    if not np.isfinite(lnp):
        print("you suck")
        
pos,prob,state = mock_sampler_1.run_mcmc(p0, N=100)
mock_sampler_1.reset()
pos,prob,state = mock_sampler_1.run_mcmc(pos, N=1000)

In [19]:
nwalkers = 32
mock_sampler_2 = emcee.EnsembleSampler(nwalkers=nwalkers, dim=1, lnpostfn=ln_posterior, 
                                        args=(v_obs_179, v_err_179))

p0 = np.zeros((nwalkers,mock_sampler_2.dim))
p0[:,0] = np.random.normal(0.5, 0.01, size=nwalkers)

for pp in p0:
    lnp = ln_posterior(pp, *mock_sampler_2.args)
    if not np.isfinite(lnp):
        print("you suck")
        
pos,prob,state = mock_sampler_2.run_mcmc(p0, N=100)
mock_sampler_2.reset()
pos,prob,state = mock_sampler_2.run_mcmc(pos, N=1000)

Real data


In [20]:
len(d[:,3])


Out[20]:
18

In [21]:
fs = np.linspace(0.,1.,100)
ls = np.zeros_like(fs)
for i,f in enumerate(fs):
    ls[i] = ln_likelihood([f], d[:,3], 10.).sum()
    
fig,axes = plt.subplots(1,2,figsize=(12,6))
axes[0].plot(fs,ls)
axes[1].plot(fs,np.exp(ls-ls.max()))
axes[1].axvline(true_f_h)
# axes[1].set_xlim(0.8,1)


Out[21]:
<matplotlib.lines.Line2D at 0x10ae60110>

In [22]:
vr_err = 15. # km/s
nwalkers = 32
sampler = emcee.EnsembleSampler(nwalkers=nwalkers, dim=1, lnpostfn=ln_posterior, 
                                args=(d[:,3], vr_err))

p0 = np.zeros((nwalkers,sampler.dim))
p0[:,0] = np.random.normal(0.5, 0.01, size=nwalkers)

for pp in p0:
    lnp = ln_posterior(pp, *sampler.args)
    if not np.isfinite(lnp):
        print("you suck")
        
pos,prob,state = sampler.run_mcmc(p0, N=100)
sampler.reset()
pos,prob,state = sampler.run_mcmc(pos, N=1000)

In [23]:
fig,axes = plt.subplots(1,2,figsize=(14,6), sharex=True, sharey=True)
axes[0].hist((1-sampler.flatchain)*179, bins=np.linspace(0.,179.,50), normed=True);
axes[0].set_xlabel(r"Number of RR Lyrae in Triand")
axes[0].set_ylabel(r"Probability")
axes[0].set_title("RR Lyrae data")
axes[0].axvline(109, linewidth=3., linestyle='dashed', color='#555555')
axes[0].text(104, 0.04, "Number of M-giants\n in Triand", fontsize=18, ha='right')
axes[0].set_yticklabels([])
[ll.set_fontsize(18) for ll in axes[0].xaxis.get_ticklabels()]

axes[1].hist((1-mock_sampler_1.flatchain)*179, bins=np.linspace(0.,179.,50), alpha=0.5, normed=True, label='18 stars');
axes[1].hist((1-mock_sampler_2.flatchain)*179, bins=np.linspace(0.,179.,50), alpha=0.5, normed=True, label='179 stars');
axes[1].set_xlabel(r"Number of RR Lyrae in Triand")
axes[1].legend(fontsize=18)
axes[1].set_title("Simulated data")
[ll.set_fontsize(18) for ll in axes[1].xaxis.get_ticklabels()]

# fig.savefig("/Users/adrian/projects/triand-rrlyrae/plots/ftriand.pdf")


Out[23]:
[None, None, None, None, None, None, None, None, None, None]

In [ ]: